set more off
clear all
version 17.0


global ylist d_wood d_charcoal d_lpg lq_coal_hat d_qcoal lq_coal_hat2 lexp_lpg d_elpg lexp_lpg2
global ylist2 d_wood d_charcoal d_lpg lq_coal_hat d_qcoal lq_coal_hat2 totexp_lpg_year d_elpg totexp_lpg_year2
global xlist linccap lsize housing_nrooms housing_selfowned sexhead lagehead lyeduc agrihead unemphead cook_jobweeks cook_female lyeduccook elec d_road
global xlist2 linccap lsize housing_nrooms housing_selfowned sexhead lagehead lyeduc agrihead unemphead cook_jobweeks cook_female lyeduccook rural elec d_road
global ylist_d d_wood d_charcoal d_lpg q_coal_hat d_qcoal q_coal_hat2 totexp_lpg_year d_elpg totexp_lpg_year2



************************************************************************************************
*** descriptives **************************************************************************************
************************************************************************************************
*** summary stats tables 1-3 **************************************************************
use "$datain\glss_combined.dta", clear
keep if cooking!=0

replace totexp_lpg_year5 = totexp_lpg_year5/10000 if glss==5 //currency reform		
replace totexp_lpg_year5 = totexp_lpg_year5 + (1.201*totexp_lpg_year5) if glss==5 //inflation between 2006 and 2013
replace lexp_lpg=asinh(totexp_lpg_year5) if glss==5
replace lexp_lpg2=lexp_lpg if glss==5 & lexp_lpg>0 & lexp_lpg!=.


*** table 1: stats full sample, treatment and control
gen temp3=treat==0
label define temptreat 0 "Treatment" 1 "Control"
label val temp3 temptreat
stddiff $ylist $xlist2 rural if glss==6, by(temp3)
putexcel set "$output\table_1-1_summaryfullsample", replace
matrix b = r(output)
putexcel A1 = matrix(b), rownames
stddiff $ylist $xlist2 rural if glss==5, by(temp3)
putexcel set "$output\table_1-2_summaryfullsample5", replace
matrix b = r(output)
putexcel A1 = matrix(b), rownames
bysort temp3 glss: count




*** table 2: regional composition of samples

keep if glss==6 & cooking!=0
gen sample=1 if (aftercategory==2 | aftercategory==3) & cook_jobweeks!=. & cook_female!=. & region!=.
recode sample (.=0)
gen temp=sample
merge 1:1 hid using "$output\weights_eb"
drop if _merge==2

//ate
tab region  //full sample
tab region if sample==1 //eligible for matching
tab region [aweight=weight_ate] if weight_ate!=. //matched ATE

tab rural  //full sample
tab rural if sample==1 //eligible for matching
tab rural [aweight=weight_ate] if weight_ate!=. //matched ATE

//rural stats
tab region if rural==1 //full sample
tab region if rural==1 & sample==1 //eligible for matching
tab region [aweight=weight_cater] if weight_cater!=. //matched CATE(rural)

//urban stats
tab region if rural==0 //full sample
tab region if rural==0 & sample==1 //eligible for matching
tab region [aweight=weight_cateu] if weight_cateu!=. //matched CATE(urban)

*** table 3: stats control group
** outcome stats
cap gen q_coal_hat2 if d_qcoal==1
//full sample
eststo ate1: estpost sum $ylist_d if treat==0 
eststo rural1: estpost sum $ylist_d if treat==0 & rural==1
eststo urban1: estpost sum $ylist_d if treat==0 & urban==1

//eligible sample
eststo ate2: estpost sum $ylist_d if treat==0 & sample==1 
eststo rural2: estpost sum $ylist_d if treat==0 & sample==1 & rural==1
eststo urban2: estpost sum $ylist_d if treat==0 & sample==1 & urban==1

//estsample
eststo ate3: estpost sum $ylist_d [aweight=weight_ate] if treat==0 & weight_ate!=.
eststo rural3: estpost sum $ylist_d [aweight=weight_cater] if treat==0 & weight_cater!=.
eststo urban3: estpost sum $ylist_d [aweight=weight_cateu] if treat==0 & weight_cateu!=.

esttab ate1 ate2 ate3 rural1 rural2 rural3 urban1 urban2 urban3 using "$output\table_3_summary_before.txt", cells(mean(fmt(3))  sd(par fmt(3))) nonumber replace label //c nc 

** covariate stats

//full sample
eststo ate1: estpost sum $xlist2 if treat==0 
eststo rural1: estpost sum $xlist2 if treat==0 & rural==1
eststo urban1: estpost sum $xlist2 if treat==0 & urban==1

//eligible sample
eststo ate2: estpost sum $xlist2 if treat==0 & sample==1 
eststo rural2: estpost sum $xlist2 if treat==0 & sample==1 & rural==1
eststo urban2: estpost sum $xlist2 if treat==0 & sample==1 & urban==1

//estsample
eststo ate3: estpost sum $xlist2 [aweight=weight_ate] if treat==0 & weight_ate!=.
eststo rural3: estpost sum $xlist2 [aweight=weight_cater] if treat==0 & weight_cater!=.
eststo urban3: estpost sum $xlist2 [aweight=weight_cateu] if treat==0 & weight_cateu!=.

esttab ate1 ate2 ate3 rural1 rural2 rural3 urban1 urban2 urban3 using "$output\table_3_summary_before2.txt", cells(mean(fmt(3))  sd(par fmt(3))) nonumber replace label //c nc 



*** Table A7: remoteness proxies
gen eligible=0
replace eligible=1 if aftercategory==2 | aftercategory==3
replace eligible=0 if cook_jobweeks==. | cook_female==.
replace d_road=. if d_road==99
replace elec=. if elec==99
cap drop n
bysort clust: gen n=_n
bysort early50: sum d_road elec if n==1 & eligible==1
stddiff d_road elec if eligible==1 & n==1, by(early50)
putexcel set "$output\table_a7_summarylate", replace
matrix b = r(output)
putexcel A1 = matrix(b), rownames
bysort early50: count if eligible==1 & rural==1 & n==1

//correlation between late and treat
corr latetreat treat if eligible==1



*** Figure 2 : Precipitation *************************************************
//data source: https://africaopendata.org/dataset/precipitation-in-ghana-2000-2015
import excel "$datain\ghana-precipitation-2000-2015.xls", sheet("pr_1991_2015") firstrow clear
drop if time==.
replace time=time-84 if time>16 
lab define time 1 "Aug" 2 "Sep" 3 "Oct" 4 "Nov" 5 "Dec" ///
	6 "Jan" 7 "Feb" 8 "Mar" 9 "Apr" 10 "May" ///
	11 "Jun" 12 "Jul" 13 "Aug" 14 "Sep" 15 "Oct" 
label values time time
gen glss=5 if Year<"2007"
replace glss=6 if Year>"2007"


twoway line Precipitationmm time if glss==5, cmissing(no) lcolor(gs5) lpattern(dash) lstyle(foreground) lwidth(medthick) ///	
		|| line Precipitationmm time if glss==6, cmissing(no) lcolor(gs5) lstyle(foreground) lwidth(medthick) ///
		xlabel(1(2)15 , valuelabel) ///
		ytitle("Precipitation (mm per month)") xtitle("") ///
		legend(region(lcolor(white)) r(1) label(1 "GLSS5") label(2 "GLSS6") order(2 1)) ///
		graphregion(color(white)) bgcolor(white)	
graph export "$output\fig2_precipitation.pdf", replace


*** Figure 3: maps 
// glss6 
use "$datain\glss_combined.dta", clear
keep if glss==6 & cooking!=0 
gen after=treat==1

gen loc=1 if loc7==1
replace loc=2 if loc7==2 | loc7==5
replace loc=3 if loc7==3 | loc7==6
replace loc=4 if loc7==4 | loc7==7


collapse (min) glss min=loc (max) max=loc region district (p50) median=loc (mean) cdistrict border* after d_wood d_charcoal d_lpg, by(DIST_CODE) 
label define lloc 1 "Accra" 2 "Coastal"  3 "Forest" 4 "Savannah"
label val min max median lloc
drop if DIST_CODE==""
merge 1:1 DIST_CODE using "$datain\shapefiles\Districts 170\map"
drop _merge
clonevar after2=after
replace after2=2 if after2>0 & after2<1
replace after2=2 if after2==2 & cdistrict==1
*replace after2=3 if match==1
label define lafter2 0 "Ineligible (before)" 1 "Ineligible (after)" 2 "Eligible" // 3 "Eligible (matched)" //3 "Before & after, charcoal prod." 
label val after2 lafter2

*spmap min using "$datain\shapefiles\Districts 170\mapcoord", id(_ID) clmethod(unique) fcolor(navy ltblue forest_green stone) legend(pos(5)) title("GLSS6", color(black))
*graph export "$output\fig_map_loc.emf", replace
spmap min using "$datain\shapefiles\Districts 170\mapcoord", id(_ID) clmethod(unique) fcolor(gs5 gs8 gs11 gs14) legend(pos(5)) title("GLSS6", color(black))
graph export "$output\fig3-1_map_loc.emf", replace
spmap after2 using "$datain\shapefiles\Districts 170\mapcoord", id(_ID) clmethod(unique) fcolor(gs5 gs11 white) legend(pos(5)) name(full, replace) title("GLSS6", color(black))
graph export "$output\fig3-2_map_aftercategory.emf", replace

// glss 5
use "$datain\glss_combined.dta", clear
keep if glss==5 & cooking!=0 
gen after=treat==1

cap drop DIST_CODE
gen DIST_CODE=""
forvalues k=1/9{
 replace DIST_CODE="0`k'" if region==`k'
}
replace DIST_CODE="10" if region==10

forvalues k=1/9{
 replace DIST_CODE=DIST_CODE+"0`k'" if district==`k'
}
forvalues k=10/27{
 replace DIST_CODE=DIST_CODE+"`k'" if district==`k'
}
replace DIST_CODE="" if district==.
replace DIST_CODE="" if region==.

gen loc=1 if loc7==1
replace loc=2 if loc7==2 | loc7==5
replace loc=3 if loc7==3 | loc7==6
replace loc=4 if loc7==4 | loc7==7


collapse (min) glss min=loc (max) max=loc region district (p50) median=loc (mean) cdistrict border* after d_wood d_charcoal d_lpg, by(DIST_CODE) 
label define lloc 1 "Accra" 2 "Coastal"  3 "Forest" 4 "Savannah"
label val min max median lloc
drop if DIST_CODE==""
merge 1:1 DIST_CODE using "$datain\shapefiles\Districts 110\map"
drop _merge
clonevar after2=after
replace after2=2 if after2>0 & after2<1
replace after2=2 if after2==2 & cdistrict==1
*replace after2=3 if match==1
label define lafter2 0 "Ineligible (before)" 1 "Ineligible (after)" 2 "Eligible" // 3 "Eligible (matched)" //3 "Before & after, charcoal prod." 
label val after2 lafter2

spmap after2 using "$datain\shapefiles\Districts 110\mapcoord", id(_ID) clmethod(unique) fcolor(gs5 gs11 white) legend(pos(5)) name(full, replace) title("GLSS5", color(black))
graph export "$output\fig3-3_map_aftercategory5.emf", replace






*** Prices (Figure 1 & Table A6) ************************************************
set more off	

***  Figure 1: prices timeline
use "$datain\Prices_Petroproduct", clear
cd "$output"
sort time	
	
replace GASOIL=DIESELGHpLt if GASOIL==.

drop if p_petrol>4 & p_petrol!=.
// gen locals for shaded areas (one xline in mid of period with number of months as width):
local GLSS "5 6 7"
foreach g in `GLSS' {
		qui sum time if GLSS ==`g', detail
			local start`g' = `r(min)'-1
			local end`g' = `r(max)'+1
			
			local x`g' = (`end`g'' + `start`g'')/2
			local w`g' = (`end`g'' - `start`g'')
			}
bysort time: sum LPG if GLSS==7
bysort time: sum GASOIL if GLSS==7

drop if time>=101			
sum time
local end = `r(max)' 
sort time		
	twoway scatter p_petrol time, msize(tiny) msymbol(o) mcolor(gs5) /// || scatter p_kerosene time, msize(vtiny) mcolor(maroon) ///
		|| scatter p_lpg time, msize(small) msymbol(x) mcolor(gs10) ///
		|| line GASOIL time, cmissing(no) lcolor(gs5) lstyle(foreground) lwidth(medthick) ///		|| line KEROSENE time, cmissing(no) ///
		|| line LPG time, cmissing(no) lcolor(gs10) lpattern(dash) lstyle(foreground) lwidth(medthick) ///
		xlabel(2(12)`end', valuelabel angle(45)) ///
		xline(`x5', lwidth(`w5') lc(gs15)) xline(`x6', lwidth(`w6') lc(gs15)) ///
		ytitle("Price per liter/kg, in GHS (nominal)") xtitle("") ///
		legend(region(lcolor(white)) r(1) order (3 4) label(3 "Diesel (official)") label(4 "LPG (official)")) ///label(2 "Kerosene") //label(1 "Diesel (obs.)")  label(2 "LPG (obs.)")
		graphregion(color(white)) bgcolor(white)	
graph export "$output\fig1_prices_567.pdf", replace


*** Table A6: regression charcoal prices
gen loc4=2 if loc7==1 //accra
replace loc4=2 if loc7==2|loc7==5 //coastal
replace loc4=3 if loc7==3|loc7==6 //forest
replace loc4=4 if loc7==4|loc7==7 //savannah
label define lloc4 1 "Accra" 2 "Coastal" 3 "Forest" 4 "Savannah"
label val loc4 lloc4
label define lloc7 1 "Accra" 2 "Urban Coastal" 3 "Urban Forest" 4 "Urban Savannah" 5 "Rural Coastal" 6 "Rural Forest" 7 "Rural Savannah"
gen loc7_2=loc7 //Accra=urban coastal
replace loc7_2=2 if loc7==1
label val loc7 loc7_2 lloc7
gen lpcoal=asinh(p_coal)
gen ltdiesel=asinh(GASOIL)
label var ltdiesel "Log of diesel tariff"
gen ltdieselurban=ltdiesel if loc2==1
gen ltdieselrural=ltdiesel if loc2==2
cap gen urban=loc2==1
replace ltdieselurban=0 if ltdieselurban==.
replace ltdieselrural=0 if ltdieselrural==.
gen accra=loc7==1
gen treat=time>=92
gen inter=treat*accra

reghdfe lpcoal ltdiesel if GLSS==6, noabsorb vce(robust)
est store e1
reghdfe lpcoal ltdiesel if GLSS==6, absorb(loc7) vce(robust)
est store e2

outreg2 [e1 e2] using "$output\table_a6_charcoalprice.tex", ///
tex(fragment) keep(ltdiesel* urban ) dec(3) nocons replace  




*** Table A9: how well do covariates predict cooking fuel choice? 
set more off
use "$datain\glss_combined.dta", clear
keep if glss==6

keep if cooking<=3 & cooking>=1 
egen district_group=group(region district) 

replace d_road=1 if urban==1
replace elec=1 if urban==1
replace d_road=. if d_road==99
replace elec=. if elec==99

tab ethnicgroup, g(deth)
tab religion, g(drel)

set matsize 2000
mlogit cooking $xlist2 drel2 drel3 drel4 drel5 drel6 drel7 drel8 deth2 deth3 deth4 deth5 deth6 deth7 deth8 deth9 i.district_group , baseoutcome(1)
predict p
gen tag=(p>=0.9999|p<=0.0001)
drop p		

mlogit cooking  $xlist2 drel2 drel3 drel4 drel5 drel6 drel7 drel8 deth2 deth3 deth4 deth5 deth6 deth7 deth8 deth9 i.district_group  if tag==0 , baseoutcome(1) 
est sto mlog
drop tag



forval i = 1/3 {
	est res mlog
	qui margins, dydx($xlist2 drel* deth*) pr(out(`i')) post
	est sto mlog`i'
}

label var linccap "Log annual income per capita"
label var sexhead "Household head is male (=1)"
label var lagehead "Household head age (in logs)"
label var dedu1 "Education level of head: None"
label var dedu2 "Education level of head: BECE"
label var dedu3 "Education level of head: MSLC"
label var dedu4 "Education level of head: SSS"
label var dedu5 "Education level of head: Voc/Tech/Teacher"
label var dedu6 "Education level of head: Tertiary"
label var deth1 "Ethnicity of head: Akan"
label var deth2 "Ethnicity of head: Ga-Dangme"
label var deth3 "Ethnicity of head: Ewe"
label var deth4 "Ethnicity of head: Guan"
label var deth5 "Ethnicity of head: Gurma"
label var deth6 "Ethnicity of head: Mole-Dagbani"
label var deth7 "Ethnicity of head: Grusi"
label var deth8 "Ethnicity of head: Mande"
label var deth9 "Ethnicity of head: Other"
label var drel1 "Religion of head: None"
label var drel2 "Religion of head: Catholic"
label var drel3 "Religion of head: Protestant"
label var drel4 "Religion of head: Pentecostal/Charismatic"
label var drel5 "Religion of head: Other Christian"
label var drel6 "Religion of head: Islam"
label var drel7 "Religion of head: Traditional"
label var drel8 "Religion of head: Other"
label var unemphead "Household head unemployed (=1)"
label var lsize "Household size (in logs)"
label var housing_selfowned "Dwelling is self-owned"
label var housing_nrooms "Number of rooms"
label var loc7 "Location"
label var agrihead "Household head employed in agriculture"
label var lyeduccook "Cook: Years of educ."

est restore mlog1
outreg2 [mlog1] using "$output\table_a9_multinomial.tex", ctitle("Firewood") replace dec(3) 
est restore mlog2
outreg2 [mlog2] using "$output\table_a9_multinomial.tex", ctitle("Charcoal") append dec(3) 
est restore mlog3
outreg2 [mlog3] using "$output\table_a9_multinomial.tex", ctitle("LPG") append dec(3) tex(fragment) alpha(0.01, 0.05, 0.1) symbol(***, **, *) label 




*** Additional Info (not in text): How well do correlates predict being surveyed late? **************************************************************
set more off
use "$datain\glss_combined.dta", clear
keep if glss==6
reghdfe late $xlist2, absorb(location religion ethnicgroup) vce(robust)
est sto late
outreg2 [late] using "$output\table_latecorrelates.rtf", replace dec(2) tex(fragment) label adjr2


